Load packages

Read in the data

jj <- read_csv(here("data","sensors", "sensor-data_all.csv")) %>% 
  clean_names() %>%
  mutate(date_time=mdy_hms(date_time), #aapply lubridate to date/time column
         date=format(date_time, '%m/%d/%Y'), #create only date column
         time=format(date_time, '%H:%M:%S')) %>% #create only time column
 select(site, sensor_number, date_time, date, time, temp_c, p_h) %>%
    mutate(site=replace(site, site=="LOL", "Lompoc Landing"),
           site=replace(site, site=="ALG", "Alegria"),
           site=replace(site, site=="BML", "Bodega Bay")) #rename locations
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Site = col_character(),
##   `Sensor number` = col_double(),
##   `Download date` = col_character(),
##   `Calibration date` = col_character(),
##   `Temp_(C)` = col_double(),
##   `Voltage#1` = col_double(),
##   TK = col_double(),
##   `S(T)` = col_double(),
##   `Eo(T)` = col_double(),
##   pH = col_double(),
##   `Date time` = col_character()
## )

Plot up the pH

#set site order for plotting (legend)
jj$site <- factor(jj$site, levels=c("Alegria", "Lompoc Landing", "Bodega Bay"))

ggplot(jj, aes(x=date_time, y=p_h, group=site)) +
  geom_line(aes(color=site), size=0.7) +
  geom_point(aes(color=site), size=0.5) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "june-aug.png"), height=20, width=40, units="cm")

Plot temp up for good measure

ggplot(jj, aes(x=date_time, y=temp_c, group=site)) +
  geom_line(aes(color=site), size=0.7) +
  geom_point(aes(color=site), size=0.5) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors","june-aug_temp.png"), height=20, width=40, units="cm")

Plot each site, temp and pH

BML

bml <- jj %>%
  filter(site=="Bodega Bay")

ggplot(bml, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="red") + 
  geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
  scale_y_continuous(name = "pH", #first axis name
    sec.axis = sec_axis(~., name="Temp (C)")) + #second axis name and features
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

## Try it with pH and temp sorted as "groups"
bml2 <- bml %>%
  pivot_longer(cols=temp_c:p_h,
               names_to = "group",
               values_to = "value")

ggplot(bml2, aes(x=date_time, y=value, group=group)) +
  geom_line(aes(color=group), size=0.7) +
  #geom_point(aes(color=group), size=0.5) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

Let’s come back to this…

Add tide cycles!

Customize HTML

Sites:

  • Bodega (Bodega%20Harbor%20entrance%2C%20California)
  • Lompoc (Point%20Arguello%2C%20California)
  • Alegria (Gaviota%2C%20California)

Glen: number of days to extract

Interval:

  • 10 minutes (00%3A10)
  • 15 minutes (00%3A15)
  • 1 hour (01%3A00)

BML

Scrape tide data from http://tbone.biol.sc.edu/tide/

bml_tide <- read_html("http://tbone.biol.sc.edu/tide/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=83;units=feet;year=2021;month=05;day=31;hour=09;min=30;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Bodega%20Harbor%20entrance%2C%20California") %>%
  html_elements("pre") %>% #select only the date, time, and tide values from the webpage
  html_text2() %>% #convert list to data table
  data.frame() %>% #convert table to data frame
  mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
  unnest(date_tide) %>% #unnest into two columns 
  mutate(date_tide=as.factor(date_tide)) %>% #make column values factors 
  separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
  select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest() 
  unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
  unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
  drop_na() %>% #remove final row with NA (not sure why that's even there)
  mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
         tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [7969].

Plot it up

ggplot(bml_tide, aes(x=date_time, y=tide)) +
  geom_line(size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("2 days"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("Tide height") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "bml_tide.png"), height=20, width=40, units="cm")

Combine the tides with the pH/temp values for BML, plot it up!

bml_all <- full_join(bml, bml_tide)
## Joining, by = "date_time"
bml3 <- bml_all %>%
  drop_na(c(tide, p_h)) %>% #whoops, started off collecting data every 10 minutes and then switched to 15 minutes (so account for that by removing pH and tide values that don't overlap)
  pivot_longer(cols=temp_c:tide,
               names_to = "data",
               values_to = "value")
  
ggplot(bml3, aes(x=date_time, y=value, group=data)) +
  geom_line(aes(color=data), size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "bml_pH_temp_tide.png"), height=20, width=40, units="cm")

Remove measurements where the sensor was out of the water (at a tide lower than -0.5, possibly higher (TBD))

bml_detide <- bml_all %>%
  filter(tide>-0.5) %>%
  drop_na(c(p_h)) #drop observations that don't overlap (10 min vs 15 min sampling interval)

ggplot(bml_detide, aes(x=date_time)) +
  geom_line(aes(y=tide), color="red") + 
  geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

Let’s filter more, now based on temperature measurements

bml_detide_temp <- bml_detide %>%
  arrange(date_time) %>% #make sure observations are in order by date/time
  mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
  filter(diff>-1.0) %>% #remove observations with large temp differences
  mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
  filter(diff2<1,
         diff2>-1)

ggplot(bml_detide_temp, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="red") + 
  geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

Filter even more, based on pH measurements

bml_detide_temp_ph <- bml_detide_temp %>%
  arrange(date_time) %>% #make sure observations are in order by date/time
  mutate(diff3 = p_h - lag(p_h, default = first(p_h))) %>% #find difference between two subsequent pH measurements to identify anomalies
  filter(diff3>-0.5,
         diff3<0.5) #remove weird readings on 7/10

ggplot(bml_detide_temp_ph, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="#009E73") + 
  geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
  geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=16, hjust=0, label="Temp (C)", color="#D55E00") +
  annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=15, hjust=-0.1, label="pH", color="#009E73") +
  annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=14, hjust=0, label="Tide", color="#0072B2") +
  xlab("Date & time") +
  ylab("Value") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "bml_detide_tide-temp-pH.png"), height=20, width=40, units="cm")

LOL

Scrape tide data from http://tbone.biol.sc.edu/tide/

lol_tide <- read_html("http://tbone.biol.sc.edu/tide/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=66;units=feet;year=2021;month=06;day=14;hour=08;min=00;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Point%20Arguello%2C%20California") %>%
  html_elements("pre") %>% #select only the date, time, and tide values from the webpage
  html_text2() %>% #convert list to data table
  data.frame() %>% #convert table to data frame
  mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
  unnest(date_tide) %>% #unnest into two columns 
  mutate(date_tide=as.factor(date_tide)) %>% #make column values factors 
  separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
  select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest() 
  unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
  unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
  drop_na() %>% #remove final row with NA (not sure why that's even there)
  mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
         tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [6337].

Plot it up

ggplot(lol_tide, aes(x=date_time, y=tide)) +
  geom_line(size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("2 days"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("Tide height") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "lol_tide.png"), height=20, width=40, units="cm")

Combine the tides with the pH/temp values for BML, plot it up!

# filter all data for LOL
lol <- jj %>%
  filter(site=="Lompoc Landing")

lol_all <- full_join(lol, lol_tide) %>%
  filter(date_time < ymd_hms("2021-08-19 02:30:00"))
## Joining, by = "date_time"
lol_plot <- lol_all %>%
  pivot_longer(cols=temp_c:tide,
               names_to = "data",
               values_to = "value")
  
ggplot(lol_plot, aes(x=date_time, y=value, group=data)) +
  geom_line(aes(color=data), size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "lol_pH_temp_tide.png"), height=20, width=40, units="cm")

Remove measurements where the sensor pool was disconnected from the ocean (at a tide lower than +0.5, likely higher (TBD))

lol_detide <- lol_all %>%
  filter(tide>0.5)

ggplot(lol_detide, aes(x=date_time)) +
  geom_line(aes(y=tide), color="red") + 
  geom_line(aes(y=temp_c), color="blue") +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

Check the peaks (the peak is where the tide starts coming back in - when the pool is re-connected to the ocean and the temp drops), and ID where they are so I can filter

lol_check <- lol_detide %>%
  filter(date_time < ymd_hms("2021-06-30 23:00:00")) %>%
  arrange(date_time) %>% #make sure observations are in order by date/time
  mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
  mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) #find difference between two subsequent temp measurements to identify anomalies
  #filter(diff < 1 | diff > -1 | diff2 < 1 | diff2 > -1)

#Thank you stas g (https://stats.stackexchange.com/a/164830) for this function
find_peaks <- function (x, m = 3){
    shape <- diff(sign(diff(x, na.pad = FALSE)))
    pks <- sapply(which(shape < 0), FUN = function(i){
       z <- i - m + 1
       z <- ifelse(z > 0, z, 1)
       w <- i + m + 1
       w <- ifelse(w < length(x), w, length(x))
       if(all(x[c(z : i, (i + 2) : w)] <= x[i + 1])) return(i + 1) else return(numeric(0))
    })
     pks <- unlist(pks)
     pks
}

pk <- find_peaks(lol_check$temp_c, m = 50) #set a high threshold bc we need it...

lol_check_peak <- lol_detide %>%
  filter(date_time < ymd_hms("2021-06-30 23:00:00")) %>%
  arrange(date_time) %>% #make sure observations are in order by date/time
  mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
  mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
  #filter(diff < 1 | diff > -1 | diff2 < 1 | diff2 > -1)
  mutate(peak = ifelse(row_number() %in% c(pk) == TRUE, 1, 0)) #if a row ID matches the row ID found by find_peaks, then call it "1" (if not, "0")

ggplot(lol_check_peak, aes(x=date_time)) +
  geom_line(aes(y=tide), color="red") + 
  geom_line(aes(y=temp_c), color="blue") +
  #geom_line(aes(y=diff), color="green") +
  #geom_line(aes(y=diff2), color="orange") +
  geom_point(aes(y=temp_c, color=ifelse(peak>0, "red", "black"), size=ifelse(peak>0, 2, 0))) + #if it's a peak, color it red and make it big
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90)) +
  scale_color_identity() +
  scale_size_identity() +
  geom_vline(aes(xintercept = date_time), lol_check_peak %>% filter(peak == 1)) + #if it's a peak, draw a vertical line
  scale_y_continuous(breaks = round(seq(min(lol_check_peak$tide), max(lol_check_peak$tide), by = 0.5),1))

Looks like anything below a 1.5 tide height is definitely disconnected from the ocean

So now, let’s filter based on anomalous pH measurements

lol_detide_temp_ph <- lol_all %>%
  filter(tide>1.5) %>%
  arrange(date_time) %>% #make sure observations are in order by date/time
  mutate(diff3 = p_h - lag(p_h, default = first(p_h))) #doesn't look like pH values jump extraordinarily

ggplot(lol_detide_temp_ph, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="blue") +
  geom_line(aes(y=diff3), color="green") +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

And plot it all up

ggplot(lol_detide_temp_ph, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="#009E73") + 
  geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
  geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  annotate(geom="text", x=as.POSIXct("2021-8-14 00:01:00"), y=20, hjust=0, label="Temp (C)", color="#D55E00") +
  annotate(geom="text", x=as.POSIXct("2021-8-14 00:01:00"), y=19, hjust=-0.1, label="pH", color="#009E73") +
  annotate(geom="text", x=as.POSIXct("2021-8-14 00:01:00"), y=18, hjust=0, label="Tide", color="#0072B2") +
  xlab("Date & time") +
  ylab("Value") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "lol_detide_tide-temp.png"), height=20, width=40, units="cm")

ALG

Scrape tide data from http://tbone.biol.sc.edu/tide/

alg_tide <- read_html("http://tbone.biol.sc.edu/tide/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=66;units=feet;year=2021;month=06;day=14;hour=08;min=00;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Gaviota%2C%20California") %>%
  html_elements("pre") %>% #select only the date, time, and tide values from the webpage
  html_text2() %>% #convert list to data table
  data.frame() %>% #convert table to data frame
  mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
  unnest(date_tide) %>% #unnest into two columns 
  mutate(date_tide=as.factor(date_tide)) %>% #make column values factors 
  separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
  select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest() 
  unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
  unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
  drop_na() %>% #remove final row with NA (not sure why that's even there)
  mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
         tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [6337].